In [1]:
 
import tensorflow as tf
In [2]:
 
import os
import sys
import collections
In [3]:
 
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
In [4]:
 
a = os.path.join(module_path, "strands_qsr_lib\qsr_lib\src3")
sys.path.append(a)
In [5]:
 
from rl import action_learner
In [6]:
 
from rl import value_estimator
In [7]:
 
from progress_learner import EventProgressEstimator
In [8]:
 
import config
import project
# Need to add this import to load class
from project import Project
In [9]:
 
from importlib import reload
In [10]:
 
# To load this, I have to add pickle.load(f, encoding='latin-1')
# Otherwise it would give the bug
# https://stackoverflow.com/questions/21129020/how-to-fix-unicodedecodeerror-ascii-codec-cant-decode-byte
p = project.Project.load("../slidearound_hopstep_1.proj")
In [11]:
 
reload(config)
    
c = config.Config()
In [12]:
 
from rl import block_movement_env
In [48]:
 
reload(action_learner)
Out[48]:
<module 'rl.action_learner' from 'D:\\git\\learn-to-perform\\rl\\action_learner.py'>
In [77]:
 
reload(value_estimator)
Out[77]:
<module 'rl.value_estimator' from 'D:\\git\\learn-to-perform\\rl\\value_estimator.py'>
In [33]:
 
p.speed
Out[33]:
0.083696821297196897
In [13]:
 
import matplotlib
from matplotlib import pyplot as plt
%matplotlib inline
 
## Test progress_estimator in a simulator

Test progress_estimator in a simulator

In [163]:
x
reload(block_movement_env)
tf.reset_default_graph()
sess =  tf.Session()
with tf.variable_scope("model") as scope:
    print('-------- Load progress model ---------')
    progress_estimator = EventProgressEstimator(is_training=False, name = p.name, config = c)  
saver = tf.train.Saver()
saver.restore(sess, '../progress.mod')
env = block_movement_env.BlockMovementEnv(c, p.speed, name = 'SlideAround', 
                       progress_estimator = progress_estimator, session = sess)
-------- Load progress model ---------
self.inputs.shape = (?, 20, 100)  after linear layer
output.shape = (10, 100) after LSTM
self.output.shape = (10,) after linear
self._targets.shape = (?,) 
INFO:tensorflow:Restoring parameters from ../progress.mod
In [164]:
env.default()
env.render()
In [165]:
 
env.step((0, [0.1, -0.7, 0.5]))
env.render()
Progress = 0.10 ; reward = 0.10
In [166]:
env.step((0, [0.2, -0.2, 0.5]))
env.render()
Progress = 0.34 ; reward = 0.24
In [167]:
env.step((0, [-0.17, 0.08, 0.5]))
env.render()
Progress = 0.70 ; reward = 0.36
In [168]:
env.step((0, [-0.3, 0, 0.5]))
env.render()
Progress = 0.76 ; reward = 0.05
In [169]:
 
env.replay()
Progress = 0.10 ; reward = 0.10
Progress = 0.10
Progress = 0.10
Progress = 0.34 ; reward = 0.24
Progress = 0.34
Progress = 0.34
Progress = 0.70 ; reward = 0.36
Progress = 0.70
Progress = 0.70
Progress = 0.76 ; reward = 0.05
Progress = 0.76
Progress = 0.76
 
## Run reinforce algorithm

Run reinforce algorithm

In [471]:
reload(config)
    
c = config.Config()
c.num_episodes = 500
c.constraint_sigma = 0
reload(block_movement_env)
reload(value_estimator)
reload(action_learner)
tf.reset_default_graph()
global_step = tf.Variable(0, name="global_step", trainable=False)
sess =  tf.Session()
policy_est = value_estimator.PolicyEstimator(c)
value_est = value_estimator.ValueEstimator(c)
sess.run(tf.global_variables_initializer())
with tf.variable_scope("model") as scope:
    print('-------- Load progress model ---------')
    progress_estimator = EventProgressEstimator(is_training=False, name = p.name, config = c)  
# Print out all variables that would be restored
for variable in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='model'):
    print (variable.name)
saver = tf.train.Saver(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='model'))
saver.restore(sess, '../progress.mod')
action_ln = action_learner.ActionLearner(c, p, progress_estimator, 
                               policy_est, value_est, session = sess)
action_policy = action_learner.random_action
past_envs, stats = action_ln.policy_learn(action_policy, breadth = 1, verbose = False,
                                          choice = 'ACTOR_CRITIC', default = True)
-------- Load progress model ---------
self.inputs.shape = (?, 20, 100)  after linear layer
output.shape = (10, 100) after LSTM
self.output.shape = (10,) after linear
self._targets.shape = (?,) 
model/SlideAround/linear/weight:0
model/SlideAround/linear/bias:0
model/SlideAround/lstm/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel:0
model/SlideAround/lstm/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/bias:0
model/SlideAround/output_linear/weight:0
model/SlideAround/output_linear/bias:0
INFO:tensorflow:Restoring parameters from ../progress.mod
Step 15 @ Episode 500/500 (0.6655352115631104)))
In [475]:
 
past_envs[499].env.replay()
(array([-0.04943158,  0.25645164,  2.00054121], dtype=float32), array([ 0.07826302,  0.15450571,  0.36097986], dtype=float32))
[ 0.04162052  0.22861052  2.06436275]
Progress = 0.41
(array([-0.37904954,  0.36121142,  2.83183146], dtype=float32), array([ 0.10941104,  0.2185127 ,  0.37859821], dtype=float32))
[-0.37593362  0.56613143  2.6496245 ]
Progress = 0.75
(array([-0.58489704,  0.64575297,  3.93091702], dtype=float32), array([ 0.1693645 ,  0.23454019,  0.39561927], dtype=float32))
[-0.73207446  0.57541993  4.07243637]
Progress = 0.61
(array([-0.63593268,  0.72196758,  5.31146955], dtype=float32), array([ 0.16210546,  0.23906255,  0.43264252], dtype=float32))
[-0.71459441  0.31634393  5.28895589]
Progress = 0.11
(array([-0.66795218,  0.7384395 ,  6.60602474], dtype=float32), array([ 0.14178075,  0.23816068,  0.45532885], dtype=float32))
[-0.64103156  0.84803205  6.7096721 ]
Progress = 0.49
(array([-0.82706654,  0.7647295 ,  7.89866066], dtype=float32), array([ 0.15921983,  0.27008566,  0.46821284], dtype=float32))
[-0.84963284  0.56440469  8.47182122]
Progress = 0.86
In [382]:
past_envs[60].env.replay()
---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-382-633238f28680> in <module>()
----> 1 past_envs[60].env.replay()

IndexError: list index out of range

In [472]:
plotting.plot_episode_stats(stats, smoothing_window=5)
Out[472]:
(<matplotlib.figure.Figure at 0x215165415f8>,
 <matplotlib.figure.Figure at 0x21539ebdba8>,
 <matplotlib.figure.Figure at 0x21586f0d7b8>)
In [328]:
plotting.plot_episode_stats(stats, smoothing_window=5)
Out[328]:
(<matplotlib.figure.Figure at 0x21512689240>,
 <matplotlib.figure.Figure at 0x21514c11630>,
 <matplotlib.figure.Figure at 0x21515a69a20>)
In [331]:
x
plotting.plot_episode_stats(stats, smoothing_window=5)
Out[331]:
(<matplotlib.figure.Figure at 0x21515c3ec18>,
 <matplotlib.figure.Figure at 0x215156b8080>,
 <matplotlib.figure.Figure at 0x2151568a2b0>)
In [50]:
 
print (plotting.__file__)
D:\git\learn-to-perform\notebooks\plotting.py
In [245]:
import pickle
In [53]:
 
from plotting import EpisodeStats
In [57]:
 
with open('session.dat', 'rb') as f:
    stats = pickle.load(f, encoding='latin-1')
In [58]:
 
plotting.plot_episode_stats(stats, smoothing_window=5)
Out[58]:
(<matplotlib.figure.Figure at 0x1cfb650a828>,
 <matplotlib.figure.Figure at 0x1cfb6586a20>,
 <matplotlib.figure.Figure at 0x1cfb770cd68>)
In [84]:
x
with open('session.500.dat', 'rb') as f:
    stats = pickle.load(f, encoding='latin-1')
In [85]:
 
plotting.plot_episode_stats(stats, smoothing_window=5)
Out[85]:
(<matplotlib.figure.Figure at 0x1cfc7e5d710>,
 <matplotlib.figure.Figure at 0x1cfc7f1d4e0>,
 <matplotlib.figure.Figure at 0x1cfc8087e80>)
In [266]:
 
import glob
In [268]:
x
# This looks kind of good
# ..\session_data_actor_critic\session.data._0.0020_0.9500_0.0020_0.9700
for fn in glob.glob('..\session_data_actor_critic\*'):
    print (fn)
    with open(fn, 'rb') as f:
        stats = pickle.load(f, encoding='latin-1')
    plotting.plot_episode_stats(stats, smoothing_window=5)
..\session_data_actor_critic\session.data._0.0020_0.9500_0.0020_0.9500
<matplotlib.figure.Figure at 0x2157f9f6e48>
..\session_data_actor_critic\session.data._0.0020_0.9500_0.0020_0.9600
..\session_data_actor_critic\session.data._0.0020_0.9500_0.0020_0.9700
..\session_data_actor_critic\session.data._0.0020_0.9500_0.0200_0.9500
..\session_data_actor_critic\session.data._0.0020_0.9500_0.0200_0.9600
..\session_data_actor_critic\session.data._0.0020_0.9500_0.0200_0.9700
..\session_data_actor_critic\session.data._0.0020_0.9600_0.0020_0.9500
..\session_data_actor_critic\session.data._0.0020_0.9600_0.0020_0.9600
..\session_data_actor_critic\session.data._0.0020_0.9600_0.0020_0.9700
..\session_data_actor_critic\session.data._0.0020_0.9600_0.0200_0.9500
..\session_data_actor_critic\session.data._0.0020_0.9600_0.0200_0.9600
..\session_data_actor_critic\session.data._0.0020_0.9600_0.0200_0.9700
..\session_data_actor_critic\session.data._0.0020_0.9700_0.0020_0.9500
..\session_data_actor_critic\session.data._0.0020_0.9700_0.0020_0.9600
..\session_data_actor_critic\session.data._0.0020_0.9700_0.0020_0.9700
..\session_data_actor_critic\session.data._0.0020_0.9700_0.0200_0.9500
..\session_data_actor_critic\session.data._0.0020_0.9700_0.0200_0.9600
..\session_data_actor_critic\session.data._0.0020_0.9700_0.0200_0.9700
..\session_data_actor_critic\session.data._0.0200_0.9500_0.0020_0.9500
..\session_data_actor_critic\session.data._0.0200_0.9500_0.0020_0.9600
..\session_data_actor_critic\session.data._0.0200_0.9500_0.0020_0.9700
..\session_data_actor_critic\session.data._0.0200_0.9500_0.0200_0.9500
..\session_data_actor_critic\session.data._0.0200_0.9500_0.0200_0.9600
..\session_data_actor_critic\session.data._0.0200_0.9500_0.0200_0.9700
..\session_data_actor_critic\session.data._0.0200_0.9600_0.0020_0.9500
..\session_data_actor_critic\session.data._0.0200_0.9600_0.0020_0.9600
..\session_data_actor_critic\session.data._0.0200_0.9600_0.0020_0.9700
..\session_data_actor_critic\session.data._0.0200_0.9600_0.0200_0.9500
..\session_data_actor_critic\session.data._0.0200_0.9600_0.0200_0.9600
..\session_data_actor_critic\session.data._0.0200_0.9600_0.0200_0.9700
..\session_data_actor_critic\session.data._0.0200_0.9700_0.0020_0.9500
..\session_data_actor_critic\session.data._0.0200_0.9700_0.0020_0.9600
..\session_data_actor_critic\session.data._0.0200_0.9700_0.0020_0.9700
..\session_data_actor_critic\session.data._0.0200_0.9700_0.0200_0.9500
..\session_data_actor_critic\session.data._0.0200_0.9700_0.0200_0.9600
..\session_data_actor_critic\session.data._0.0200_0.9700_0.0200_0.9700
In [278]:
 
with open('..\\abc\session.data._0.0020_0.9500_0.0020_0.9600', 'rb') as f:
    stats = pickle.load(f, encoding='latin-1')
plotting.plot_episode_stats(stats, smoothing_window=5)
Out[278]:
(<matplotlib.figure.Figure at 0x21574d79048>,
 <matplotlib.figure.Figure at 0x21506720f98>,
 <matplotlib.figure.Figure at 0x2150673a438>)
In [296]:
 
import matplotlib
%matplotlib inline
import matplotlib.cm as cm
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt
matplotlib.rcParams['xtick.direction'] = 'out'
matplotlib.rcParams['ytick.direction'] = 'out'
delta = 0.025
x = np.arange(-3.0, 3.0, delta)
y = np.arange(-2.0, 2.0, delta)
X, Y = np.meshgrid(x, y)
Z1 = mlab.bivariate_normal(X, Y, 1.0, 1.0, 0.0, 0.0)
Z2 = mlab.bivariate_normal(X, Y, 1.5, 0.5, 1, 1)
# difference of Gaussians
Z = 10.0 * (Z2 - Z1)
plt.figure()
CS = plt.contour(X, Y, Z2, 3)
plt.clabel(CS, inline=1, fontsize=10)
plt.title('Simplest default with labels')
Out[296]:
Text(0.5,1,'Simplest default with labels')
In [351]:
 
Z1
Out[351]:
array([[ 0.00023928,  0.00025784,  0.00027766, ...,  0.00029881,
         0.00027766,  0.00025784],
       [ 0.00025147,  0.00027097,  0.0002918 , ...,  0.00031404,
         0.0002918 ,  0.00027097],
       [ 0.00026411,  0.0002846 ,  0.00030647, ...,  0.00032983,
         0.00030647,  0.0002846 ],
       ..., 
       [ 0.00027722,  0.00029872,  0.00032168, ...,  0.0003462 ,
         0.00032168,  0.00029872],
       [ 0.00026411,  0.0002846 ,  0.00030647, ...,  0.00032983,
         0.00030647,  0.0002846 ],
       [ 0.00025147,  0.00027097,  0.0002918 , ...,  0.00031404,
         0.0002918 ,  0.00027097]])
In [389]:
 
import matplotlib.pyplot as plt
mean = [0, 0]
cov = [[1, 0], [0, 100]]
x, y = np.random.multivariate_normal(mean, cov, 100).T
plt.plot(x, y, 'x')
plt.axis('equal')
plt.show()
In [413]:
xxxxxxxxxx
 
a = np.array([2,3])
In [414]:
 
a ** 2
Out[414]:
array([4, 9], dtype=int32)
In [ ]: